library('tidyverse')
## Warning: 'timedatectl' indicates the non-existent timezone name 'n/a'
## Warning: Your system is mis-configured: '/etc/localtime' is not a symlink
## Warning: It is strongly recommended to set envionment variable TZ to 'Etc/UCT'
## (or equivalent)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.1.8
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library('ballgown')
## 
## Attaching package: 'ballgown'
## 
## The following objects are masked from 'package:dplyr':
## 
##     contains, expr, last
## 
## The following object is masked from 'package:tidyr':
## 
##     contains
## 
## The following object is masked from 'package:ggplot2':
## 
##     expr
## 
## The following object is masked from 'package:base':
## 
##     structure
library('plotly')
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
ids = c("cdc5_1_S9","cdc5_2_S10","CmasM_1_S15","CmasM_2_S16","EhMyb10_1_S11","EhMyb10_2_S12","pEhEx_2_S8", "pEhEx1_S7")
type = c("EhCDC5-like","EhCDC5-like","EhCDC5-like+EhMyb10","EhCDC5-like+EhMyb10","EhMyb10","EhMyb10","Control","Control")
results = "~/ballgown_r2"
path = paste(results,ids,sep="/")
pheno_data = data.frame(ids,type,path)
pheno_data
bg2 = ballgown(dataDir = "~/ballgown_r2/", samplePattern = "_S", pData=pheno_data)
## Sun Aug 27 06:32:48 2023
## Sun Aug 27 06:32:48 2023: Reading linking tables
## Sun Aug 27 06:32:48 2023: Reading intron data files
## Sun Aug 27 06:32:48 2023: Merging intron data
## Sun Aug 27 06:32:48 2023: Reading exon data files
## Sun Aug 27 06:32:48 2023: Merging exon data
## Sun Aug 27 06:32:48 2023: Reading transcript data files
## Sun Aug 27 06:32:49 2023: Merging transcript data
## Wrapping up the results
## Sun Aug 27 06:32:49 2023
bg_table2 = texpr(bg2, meas="all")
bg_table2 %>% head()
pData(bg2)
bg_table2 %>% 
  select(c(t_name, num_exons, length), starts_with("FPKM")) %>%
  mutate_at(vars(starts_with("FPKM")), ~ log2(.x+1)) %>% 
  pivot_longer(cols=starts_with("FPKM"), values_to = "FPKM", names_to = "Sample") %>%
  mutate(Sample=str_replace(Sample, "FPKM.", "")) %>% 
  ggplot(aes(x=Sample, y=FPKM, fill=Sample)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) -> p
ggplotly(p)
stat_results = stattest(bg2, feature='transcript', meas='FPKM', covariate='type')
stat_results %>% head()
stat_results %>% filter(qval<=0.06)
bg_table2 %>% 
  select(c(t_name, num_exons, length), starts_with("FPKM")) %>%
  mutate_at(vars(starts_with("FPKM")), ~ log2(.x+1)) %>% 
  pivot_longer(cols=starts_with("FPKM"), values_to = "FPKM", names_to = "Sample") %>%
  mutate(Sample=str_replace(Sample, "FPKM.", "")) %>%  
  mutate(Type = str_replace(Sample, "_*(1|2)_S\\d+", "")) %>%
  mutate(Type = str_replace(Type, "CmasM", "EhCDC5-like+EhMyb10")) %>%
  mutate(Type = str_replace(Type, "cdc5", "EhCDC5-like")) %>%
  mutate(Type = str_replace(Type, "pEhEx", "Control")) %>%
  group_by(t_name, Type) %>%
  summarise(FPKM=mean(FPKM))-> tmp_data
## `summarise()` has grouped output by 't_name'. You can override using the
## `.groups` argument.
tmp_data$Type = factor(tmp_data$Type, levels=c("Control","EhMyb10","EhCDC5-like","EhCDC5-like+EhMyb10"))
tmp_data %>%
  ggplot(aes(x=Type, y=FPKM, fill=Type)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) -> p
ggplotly(p)
bg_table2 %>% 
  select(c(t_name, num_exons, length), starts_with("FPKM")) %>%
  mutate(mean_FPKM_pEhEx=(FPKM.pEhEx_2_S8+FPKM.pEhEx1_S7)/2) %>%
  mutate_at(vars(starts_with("FPKM")), ~ .x/mean_FPKM_pEhEx) %>% head()
bg_table2 %>% 
  select(c(t_name, num_exons, length), starts_with("FPKM")) %>%
  mutate(exprpEhEx=(FPKM.pEhEx_2_S8+FPKM.pEhEx1_S7)/2) %>%
  mutate_at(vars(starts_with("FPKM")), ~ .x/exprpEhEx) %>%
  mutate_at(vars(starts_with("FPKM")), ~ log2(.x)) %>% 
  pivot_longer(cols=starts_with("FPKM"), values_to = "log2FC", names_to = "Sample") %>% 
  mutate(Sample=str_replace(Sample, "FPKM.", "")) %>%
  mutate(Type = str_replace(Sample, "_*(1|2)_S\\d+", "")) %>% 
  mutate(Type = str_replace(Type, "CmasM", "EhCDC5-like+EhMyb10")) %>%
  mutate(Type = str_replace(Type, "cdc5", "EhCDC5-like")) %>%
  mutate(Type = str_replace(Type, "pEhEx", "Control")) %>%
  group_by(t_name, Type) %>%
  summarise(log2FC=mean(log2FC)) -> tmp_data
## `summarise()` has grouped output by 't_name'. You can override using the
## `.groups` argument.
tmp_data$Type = factor(tmp_data$Type, levels=c("Control","EhMyb10","EhCDC5-like","EhCDC5-like+EhMyb10"))
tmp_data %>%
  ggplot(aes(x=Type, y=log2FC, fill=Type)) + 
  geom_boxplot() + 
  theme(axis.text.x = element_text(angle = 20, vjust = 0.5, hjust=1)) -> p
ggplotly(p)
## Warning: Removed 8799 rows containing non-finite values (`stat_boxplot()`).
bg_table2 %>% 
  select(c(t_name, num_exons, length), starts_with("FPKM")) %>%
  mutate(exprpEhEx=(FPKM.pEhEx_2_S8+FPKM.pEhEx1_S7)/2) %>%
  mutate_at(vars(starts_with("FPKM")), ~ .x/exprpEhEx) %>%
  mutate_at(vars(starts_with("FPKM")), ~ log2(.x)) %>%
  pivot_longer(cols=starts_with("FPKM"), values_to = "log2FC", names_to = "Sample") %>%
  mutate(Sample=str_replace(Sample, "FPKM.", "")) %>%
  filter(!grepl("^U",Sample)) %>%
  ggplot(aes(x=exprpEhEx, y=log2FC, color=Sample)) +
  geom_point() + 
  scale_x_log10() +
  scale_color_brewer(palette = "Set1")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 8213 rows containing missing values (`geom_point()`).

bg_table2 %>%
  select(c(t_name, gene_id, 
           num_exons, length), starts_with("FPKM")) %>%
  mutate(exprpEhEx=(FPKM.pEhEx_2_S8+FPKM.pEhEx1_S7)/2) %>%
  mutate_at(vars(starts_with("FPKM")), ~ .x/exprpEhEx) %>%
  mutate_at(vars(starts_with("FPKM")), ~ log2(.x)) %>%
  pivot_longer(cols=starts_with("FPKM"), values_to = "log2FC", names_to = "Sample") %>%
  mutate(Sample=str_replace(Sample, "FPKM.", "")) %>%
  filter(abs(log2FC)>5) -> expr_dif
expr_dif %>% 
  ggplot(aes(x=exprpEhEx, y=log2FC, color=Sample, text=t_name)) +
  geom_point() + 
  scale_x_log10() -> p
ggplotly(p)
## Warning: Transformation introduced infinite values in continuous x-axis
expr_dif %>% filter(num_exons>1 & is.finite(log2FC)) %>% arrange(log2FC)# %>% head()

Transcripts of interest

bg_table2 %>% filter(grepl("EHI_153670|EHI_139370|EHI_200710|EHI_158190|EHI_129760|EHI_181610|EHI_167620|EHI_087610",t_name)) %>% select(t_name,gene_id) -> lookupGenesTable
plotMeans(lookupGenesTable$gene_id[grepl("EHI_200710",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_139370",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_158190",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_153670",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_129760",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_181610",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_167620",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_087610",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans("MSTRG.2631", bg2, groupvar='type', meas='FPKM', colorby='transcript')